clear all
set more off
set mem 10000000
set matsize 10000
*version 13
version 12

********************************************** 
*** Collapse file for SECC Data **************
********************************************** 

** Set file paths
do "$path_code/paths.do"

************************************************************** 
************************************************************** 

** 1. Collapse individual-level datasets to the household level

forvalues st = 1/36 {
if inlist(`st',4)==0 {

	cd "$secc_i/st`st'"
	local dtfiles : dir . files "*.dta"
	foreach file of local dtfiles {
*if real(substr("`file'",29,1))>=5 {		
		use "`file'", clear
		
			// confirm PCA01_id exists, and if not, create it
		cap count if pca01_id!=.
		cap gen pca01_id = .

			// drop households that should have been auto-excluded (drop specific househoulds)
		{
		cap drop if pca01_id == 91869 & hh_id == "052100119"
		cap drop if pca01_id == 94926 & hh_id == "030300106"
		cap drop if pca01_id == 94926 & hh_id == "030300066"
		cap drop if pca01_id == 587589 & hh_id == "026800116"
		cap drop if pca01_id == 587589 & hh_id == "026800058"
		cap drop if pca01_id == 524394 & hh_id == "005700003" // from here on, all from state 36 district 3
		cap drop if pca01_id == 524390 & hh_id == "003800003" // (all failed on pucca)
		cap drop if pca01_id == 524390 & hh_id == "003800004" // (all have burnt brick walls)
		cap drop if pca01_id == 524390 & hh_id == "003800005"
		cap drop if pca01_id == 524390 & hh_id == "003800006"
		cap drop if pca01_id == 524390 & hh_id == "003800012"
		cap drop if pca01_id == 524390 & hh_id == "003800016"
		cap drop if pca01_id == 524390 & hh_id == "003800018"
		cap drop if pca01_id == 524390 & hh_id == "003800019"
		cap drop if pca01_id == 524390 & hh_id == "003800020"
		cap drop if pca01_id == 524390 & hh_id == "003800021"
		cap drop if pca01_id == 524390 & hh_id == "003800023"
		cap drop if pca01_id == 524390 & hh_id == "003800024"
		cap drop if pca01_id == 524390 & hh_id == "003800025"
		cap drop if pca01_id == 524390 & hh_id == "003800026"
		cap drop if pca01_id == 524390 & hh_id == "003800028"
		cap drop if pca01_id == 524390 & hh_id == "003800032"
		cap drop if pca01_id == 524390 & hh_id == "003800064"
		cap drop if pca01_id == 524390 & hh_id == "003800074"
		cap drop if pca01_id == 524390 & hh_id == "003800075"
		cap drop if pca01_id == 524390 & hh_id == "003800076"
		cap drop if pca01_id == 524390 & hh_id == "003800077"
		cap drop if pca01_id == 524390 & hh_id == "003800078"
		cap drop if pca01_id == 524390 & hh_id == "003800080"
		cap drop if pca01_id == 524390 & hh_id == "003800081"
		cap drop if pca01_id == 524390 & hh_id == "003800083"
		cap drop if pca01_id == 524390 & hh_id == "003800101"
		cap drop if pca01_id == 524390 & hh_id == "003800106"
		cap drop if pca01_id == 524390 & hh_id == "003800109"
		cap drop if pca01_id == 524390 & hh_id == "003800110"
		cap drop if pca01_id == 524390 & hh_id == "003800112"
		cap drop if pca01_id == 524390 & hh_id == "003800114"
		cap drop if pca01_id == 524390 & hh_id == "003800117"
		cap drop if pca01_id == 524390 & hh_id == "003800118"
		cap drop if pca01_id == 524390 & hh_id == "003800119"
		cap drop if pca01_id == 524389 & hh_id == "003400060"
		}

			// correct auto-exclusion criteria that were miscoded at the individual level, but not for the whole household
			// (we're treating the (very few) errors as coding errors, NOT erroneous inclusions in the SECC deprivations dataset)
		** i. Motorized 2/3/4 wheeler/fishing boat (2 --> auto-exclusion)
		cap gen own_motor_veh = 0
		qui replace own_motor_veh = 0 	// this is consistently miscoded as Y=2, N=1 instead of Y=1, N=0

		** ii. Mechanized 3-4 wheeler agricultural equipment (1 --> auto-exclusion) 
		cap gen mech_3_4_wheeler_agr_eqp = 0
		qui replace mech_3_4_wheeler_agr_eqp = 0 

		** iii. Kisan credit card with credit limit of over Rs. 50,000/- (1 --> auto-exclusion)
		cap gen credit_card = 0
		qui replace credit_card = 0 if credit_card!=.

		** iv. Household member goverment employee (YES --> auto-exclusion)
		*  (category was dropped in original data build because we felt it was not relevant to our analysis)

		** v. Households with non-agricultural enterprises registered with government (1 --> auto-exclusion)
		cap gen own_oprt_ent_reg_wt_gvt = 0
		qui replace own_oprt_ent_reg_wt_gvt = 0 if own_oprt_ent_reg_wt_gvt!=.

		** vi. Any member of household earning more than Rs. 10,000 per month (>10k --> auto-exclusion) 
		assert highest_mnth_inc=="5-10" | highest_mnth_inc=="<5" if highest_mnth_inc!=""

		** vii. Paying income tax (YES --> auto-exclusion)
		*  (category was dropped in original data build because we felt it was not relevant to our analysis)

		** viii. Paying professional tax (YES --> auto-exclusion)
		*  (category was dropped in original data build because we felt it was not relevant to our analysis)
		 
		** ix. 3 or more rooms with pucca walls and roof (3+pucca --> auto-exclusion)
		qui gen pucca_walls = inlist(wall_mat,"BBK","CON","STM") 
		qui gen pucca_roof = inlist(roof_mat,"BBK","CON","GMA","MMT","SLA","STO")
		cap gen hh_rooms = 1
		cap replace hh_rooms = "1" if inlist(hh_rooms,"AV","KUR")
		cap destring hh_rooms, replace
		qui gen pucca_3room = hh_rooms>=3 & pucca_walls==1 & pucca_roof==1
		assert pucca_3room == 0
		cap drop pucca_walls pucca_roof pucca_3room

		** x. Owns a refrigerator (1 --> auto-exclusion)
		cap gen own_frig = 0
		qui replace own_frig = 0 if own_frig!=.

		** xi. Owns landline phone ("L" --> auto-exclusion)
		cap gen own_phone = "N"
		qui egen temp_mode = mode(own_phone) if own_phone!="L", by(st_code dt_code bk_code gr_code tn_code hh_id pca01_id) minmode
		qui egen temp_mode2 = mode(temp_mode), by(st_code dt_code bk_code gr_code tn_code hh_id pca01_id) minmode
		qui replace own_phone = temp_mode2 if own_phone=="L" & inlist(temp_mode2,"N","M")
		assert inlist(own_phone,"N","M","")
		cap drop temp_mode*
		
		** xii. Owns more than 2.5 acres of irrigated land with 1 irrigation equipment (>2.5 & 1 --> auto-exclusion)
		cap gen totalirr = 0
		cap gen irr_equip = 0
		cap replace totalirr = "" if inlist(totalirr,"YES", "NO")
		cap destring totalirr, replace
		qui gen crit12 = totalirr>2.5 & irr_equip==1
		assert crit12==0 if totalirr!=.
		cap drop crit12
		
		** xiii. 5 acres or more of irrigated land for two or more crop seasons (>5 --> auto-exclusion)
		assert totalirr<5 if totalirr!=.

		** xiv. Owning at least 7.5 acres of land or more with at least one irrigation equipment.
		cap gen totalunirr = 0
		cap gen otherirr = 0
		cap replace totalunirr = "" if inlist(totalunirr,"YES","NO")
		cap replace otherirr = "" if inlist(otherirr,"YES","NO")
		cap destring totalunirr otherirr, replace
		qui gen all_land = totalirr + totalunirr + otherirr
		qui gen crit14 = all_land>=7.5 & irr_equip==1
		assert crit14==0 if all_land!=.
		cap drop all_land crit14
		
			// create household and person identifiers that are (assuredly) unique
		duplicates t st_code dt_code bk_code gr_code tn_code hh_id person_id, gen(dup)
		sort st_code dt_code bk_code gr_code tn_code hh_id person_id
		replace person_id = 99 if person_id==. & dup>0
		forvalues i = 1/8 {
			replace person_id = 1000*`i'+person_id if dup>0 & dup[_n-`i']==dup & hh_id[_n-`i']==hh_id & person_id[_n-`i']==person_id
		}	
		unique st_code dt_code bk_code gr_code tn_code hh_id person_id
		assert r(unique)==r(N)
		drop dup
		egen HH = group(st_code dt_code bk_code gr_code tn_code hh_id pca01_id), missing
		sort HH

			// people in household by gender
		assert gender=="M" | gender=="F" | gender==""
		egen n_tot = count(person_id), by(HH)
		egen n_male = count(person_id) if gender=="M", by(HH)
		egen n_female = count(person_id) if gender=="F", by(HH)

			// people in household by age
		egen n_age_0_6 = count(person_id) if inrange(age,0,6), by(HH)
		egen n_age_7_15 = count(person_id) if inrange(age,7,15), by(HH)
		egen n_age_16_25 = count(person_id) if inrange(age,16,25), by(HH)
		egen n_age_26_40 = count(person_id) if inrange(age,26,40), by(HH)
		egen n_age_41_60 = count(person_id) if inrange(age,41,60), by(HH)
		egen n_age_61_plus = count(person_id) if age>60, by(HH)

			// people in household by gender and age
		egen n_m_age_0_6 = count(person_id) if inrange(age,0,6) & gender=="M", by(HH)
		egen n_m_age_7_15 = count(person_id) if inrange(age,7,15) & gender=="M", by(HH)
		egen n_m_age_16_25 = count(person_id) if inrange(age,16,25) & gender=="M", by(HH)
		egen n_m_age_26_40 = count(person_id) if inrange(age,26,40) & gender=="M", by(HH)
		egen n_m_age_41_60 = count(person_id) if inrange(age,41,60) & gender=="M", by(HH)
		egen n_m_age_61_plus = count(person_id) if age>60 & gender=="M", by(HH)
		egen n_f_age_0_6 = count(person_id) if inrange(age,0,6) & gender=="F", by(HH)
		egen n_f_age_7_15 = count(person_id) if inrange(age,7,15) & gender=="F", by(HH)
		egen n_f_age_16_25 = count(person_id) if inrange(age,16,25) & gender=="F", by(HH)
		egen n_f_age_26_40 = count(person_id) if inrange(age,26,40) & gender=="F", by(HH)
		egen n_f_age_41_60 = count(person_id) if inrange(age,41,60) & gender=="F", by(HH)
		egen n_f_age_61_plus = count(person_id) if age>60 & gender=="F", by(HH)

			// other within-household demographic counts, by age, for adults
		egen n_m_16_single = count(person_id) if age>=16 & gender=="M" & marital=="N", by(HH) // M adults that are single and never married
		egen n_f_16_single = count(person_id) if age>=16 & gender=="F" & marital=="N", by(HH) // F adults that are single and never married
		egen n_m_16_illit = count(person_id) if age>=16 & gender=="M" & edu_level=="IL", by(HH) // M adults that are illiterate
		egen n_f_16_illit = count(person_id) if age>=16 & gender=="F" & edu_level=="IL", by(HH) // F adults that are illiterate
		egen n_m_16_midsch = count(person_id) if age>=16 & gender=="M" & inlist(edu_level,"MI","SE","HS","GR"), by(HH) // M adults with at least middle school education
		egen n_f_16_midsch = count(person_id) if age>=16 & gender=="F" & inlist(edu_level,"MI","SE","HS","GR"), by(HH) // F adults with at least middle school education

			// occupations counts, within-household
		levelsof occupation, local(jobs)
		foreach j in `jobs' {
			local j2 = lower("`j'")
			egen n_m_occ_`j2' = count(person_id) if gender=="M" & occupation=="`j'", by(HH) // M
			egen n_f_occ_`j2' = count(person_id) if gender=="F" & occupation=="`j'", by(HH) // F
			egen n_m_16_occ_`j2' = count(person_id) if gender=="M" & occupation=="`j'" & age>=16, by(HH) // M adults
			egen n_f_16_occ_`j2' = count(person_id) if gender=="F" & occupation=="`j'" & age>=16, by(HH) // F adults
		}
			// replace missings with zeros for all within-household person counts 
		foreach v of varlist n_* {
			egen temp = mean(`v'), by(HH)
			replace `v' = temp if `v' == .
			drop temp
			replace `v' = 0 if `v'==.
		}	

			// harmonize SC/ST variable within household
		gen temp = "" 
		replace temp = "SC" if caste_group=="SC"
		replace temp = "ST" if caste_group=="ST"
		egen temp1 = mode(temp), by(HH) minmode
		replace temp = temp1 if temp1!="" & temp==""
		egen temp2 = mode(caste_group), by(HH) minmode
		replace temp = temp2 if temp2!="" & temp==""
		assert temp!="" if caste_group!=""
		replace caste_group = temp
		drop temp*

			// if anyone in the household owns a phone, the whole household owns a phone
		gen temp1 = own_phone
		replace temp1 = "" if own_phone=="N"
		egen temp2 = mode(temp1), by(HH) minmode
		replace own_phone = temp2 if (own_phone=="N" | own_phone=="") & temp2!=own_phone
		drop temp*

			// for categorical vars that SHOULD be constant within HH, replace discrepancies with within-HH mode
		foreach v of varlist typeofhhd caste_group wall_mat roof_mat hh_own pub_priv highest_mnth_inc main_src_of_hh_inc own_phone {
			qui egen temp = mode(`v'), by(HH) minmode
			assert mi(`v')==1 if mi(temp)==1
			replace `v' = temp if mi(temp)==0
			qui drop temp
		}

			// for numeric/dummy vars that SHOULD be constant within HH, replace discrepancies with within-HH max
		foreach v in hh_rooms bonded_labor manual_scavenger salaried_job own_oprt_ent_reg_wt_gvt  ///
						own_frig own_motor_veh mech_3_4_wheeler_agr_eqp irr_equip credit_card own_any_land  {
			cap destring `v', replace force
			cap egen temp = max(`v'), by(HH)
			*assert mi(`v')==1 if mi(temp)==1
			cap replace `v' = temp if mi(temp)==0
			cap drop temp
		}

			// for area vars that SHOULD be constant within HH, replace discrepancies with within-HH mean
		foreach v of varlist totalirr totalunirr otherirr {
			destring `v', replace force
			qui egen temp = mean(`v'), by(HH)
			assert mi(`v')==1 if mi(temp)==1
			replace `v' = temp if mi(temp)==0
			qui drop temp
		}

			// create head-of-household indicator
		egen temp_hashead = sum(hh_head), by(HH)
		bysort HH: gen temp_listorder = _n
		gen hh_head_2 = .
		replace hh_head_2 = 1 if hh_head==1 & temp_hashead==1
		replace hh_head_2 = 2 if temp_hashead==0 & inlist(occupation,"AGR","OTH","WRK")
		egen temp_headcount = count(hh_head_2), by(HH)
		egen temp_minhead = min(temp_listorder) if hh_head_2==2, by(HH) // break ties -- pick the higher person in the list
		replace hh_head_2 = . if hh_head_2==2 & temp_headcount>1 & temp_minhead!=temp_listorder
		replace hh_head_2 = 3 if hh_head_2==2 & temp_headcount>1 & temp_minhead==temp_listorder
		egen temp_headcount2 = count(hh_head_2), by(HH)
		replace hh_head_2 = 4 if temp_headcount2==0 & temp_listorder==1
		replace hh_head_2 = 0 if hh_head_2==.
		lab def hhheadlab 1 "Is head" 2 "Head by job" 3 "Head by job & list order" 4 "Head by listorder only"
		label values hh_head_2 hhheadlab
		drop temp*

			// keep head-of-household only, rename relevant person-specific variables
		unique HH
		local HH_count = r(unique)
		di `HH_count'
		keep if hh_head_2!=0
		unique HH
		assert r(unique)==`HH_count' & r(unique)==r(N)
		foreach v of varlist gender age marital edu_level occupation other_occ {
			rename `v' hhh_`v'
		}

			// drop unneccessary variables, order, sort, rename
		drop person_id hh_head
		rename hh_head_2 hh_head
		order pca01_id HH st_code dt_code bk_code gr_code tn_code hh_id n_* hh_head hhh_*
		sort pca01_id HH
		rename dt_code dt_code11_secc
		rename bk_code bk_code11_secc
		rename gr_code gr_code11_secc
		rename tn_code vi_code11_secc
		rename hh_id hh_id_secc
		rename HH hh_id

			// new variable labels, and updating existing labels
		la var hh_id "Unique within-district household ID"
		la var st_code "State code"
		la var dt_code11_secc "2011 District code (dt_code11 reindexed)"
		la var bk_code11_secc "2011 Block code (bk_code11 reindexed)"
		la var vi_code11_secc "2011 Village code (vi_code11 reindexed, but not perfectly)"
		la var gr_code11_secc "2011 Grampanchayat code (unique to SECC?)"
		la var hh_id_secc "2011 Household code, for mapping back to original SECC files"
		la var n_tot "Total people in HH"
		la var n_male "Total males in HH"
		la var n_female "Total females in HH"
		la var n_age_0_6 "HH members, ages 0-6"
		la var n_age_7_15 "HH members, ages 7-15"
		la var n_age_16_25 "HH members, ages 16-25"
		la var n_age_26_40 "HH members, ages 26-40"
		la var n_age_41_60 "HH members, ages 41-60"
		la var n_age_61_plus "HH members, ages 61 and up"
		la var n_m_age_0_6 "M HH members, ages 0-6"
		la var n_m_age_7_15 "M HH members, ages 7-15"
		la var n_m_age_16_25 "M HH members, ages 16-25"
		la var n_m_age_26_40 "M HH members, ages 26-40"
		la var n_m_age_41_60 "M HH members, ages 41-60"
		la var n_m_age_61_plus "M HH members, ages 61 and up"
		la var n_f_age_0_6 "F HH members, ages 0-6"
		la var n_f_age_7_15 "F HH members, ages 7-15"
		la var n_f_age_16_25 "F HH members, ages 16-25"
		la var n_f_age_26_40 "F HH members, ages 26-40"
		la var n_f_age_41_60 "F HH members, ages 41-60"
		la var n_f_age_61_plus "F HH members, ages 61 and up"
		la var n_m_16_single "M HH members, 16+ and never married"
		la var n_f_16_single "F HH members, 16+ and never married"
		la var n_m_16_illit "M HH members, 16+ and illiterate"
		la var n_f_16_illit "F HH members, 16+ and illiterate"
		la var n_m_16_midsch "M HH members, 16+ and at least middle school education"
		la var n_f_16_midsch "F HH members, 16+ and at least middle school education"
		cap la var n_m_occ_agr "M HH members, occup: agriculture"
		cap la var n_m_occ_dep "M HH members, occup: dependent"
		cap la var n_m_occ_dom "M HH members, occup: domestic"
		cap la var n_m_occ_nil "M HH members, occup: none"
		cap la var n_m_occ_oth "M HH members, occup: other"
		cap la var n_m_occ_stu "M HH members, occup: student"
		cap la var n_m_occ_wrk "M HH members, occup: worker"
		cap la var n_f_occ_agr "F HH members, occup: agriculture"
		cap la var n_f_occ_dep "F HH members, occup: dependent"
		cap la var n_f_occ_dom "F HH members, occup: domestic"
		cap la var n_f_occ_nil "F HH members, occup: none"
		cap la var n_f_occ_oth "F HH members, occup: other"
		cap la var n_f_occ_stu "F HH members, occup: student"
		cap la var n_f_occ_wrk "F HH members, occup: worker"
		cap la var n_m_16_occ_agr "M HH members, 16+, occup: agriculture"
		cap la var n_m_16_occ_dep "M HH members, 16+, occup: dependent"
		cap la var n_m_16_occ_dom "M HH members, 16+, occup: domestic"
		cap la var n_m_16_occ_nil "M HH members, 16+, occup: none"
		cap la var n_m_16_occ_oth "M HH members, 16+, occup: other"
		cap la var n_m_16_occ_stu "M HH members, 16+, occup: student"
		cap la var n_m_16_occ_wrk "M HH members, 16+, occup: worker"
		cap la var n_f_16_occ_agr "F HH members, 16+, occup: agriculture"
		cap la var n_f_16_occ_dep "F HH members, 16+, occup: dependent"
		cap la var n_f_16_occ_dom "F HH members, 16+, occup: domestic"
		cap la var n_f_16_occ_nil "F HH members, 16+, occup: none"
		cap la var n_f_16_occ_oth "F HH members, 16+, occup: other"
		cap la var n_f_16_occ_stu "F HH members, 16+, occup: student"
		cap la var n_f_16_occ_wrk "F HH members, 16+, occup: worker"
		cap la var hh_head "Flag for how HH head was assigned"
		cap la var hhh_gender "Gender of HH head"
		cap la var hhh_age "Age of HH head"
		cap la var hhh_marital "Marital status of HH head (Separaed, Nevermarried, Married, Divorced, Widowed)"
		cap la var hhh_edu_level "Education level of HH head (illiterate, primary, middle, secondary, grad)"
		cap la var hhh_occupation "Occupation of HH head (AGRiculture,DOMestic,STUdent,DEPendent,WoRKer,NIL,OTHer)"
		cap la var hhh_other_occup "HH head other occupation description"
		cap la var typeofhhd "Type of household (1 = normal)"
		cap la var own_oprt_ent_reg_wt_gvt "Own/operate govt-registered enterprise"

		// save household-level file
		assert hh_id!=.
		unique hh_id
		assert r(unique)==r(N)
		duplicates drop
		compress
		local dt = subinstr(substr("`file'",-6,2),"t","",1)
		assert dt_code11_secc==`dt'
		save "$secc_h/st`st'/secc_pca_hhold_rural_st`st'_dt`dt'.dta", replace
*}
*else { 
*di "`file'"
*}		
	}

}
}

************************************************************** 
************************************************************** 

** 2. Collapse household-level datasets to the village (PCA01_id) level
** 		(weighting each household equally AND by number of people) 

forvalues st = 1/36 {
if inlist(`st',4)==0 {

	cd "$secc_h/st`st'"
	cap erase "$secc_v/secc_pca_vill_st`st'_nowt.dta"
	cap erase "$secc_v/secc_pca_vill_st`st'_popwt.dta"
	local dtfiles : dir . files "*.dta"
	foreach file of local dtfiles {

		use "`file'", clear
			
			// create village ids for villages unmatched to the PCA
		sort pca01_id st_code dt_code11_secc bk_code11_secc gr_code11_secc vi_code11_secc
		gen temp_unmatched_id = 1 if pca01_id==.
		egen unmatched_id = group(temp_unmatched_id st_code dt_code11_secc bk_code11_secc gr_code11_secc vi_code11_secc)
		drop st_code-vi_code11_secc n_male-hh_head temp*
		
			// create head-of-househould indicator variables, for averaging
		gen pct_hhh_female = hhh_gender=="F"
		gen pct_hhh_illit = hhh_edu_level=="IL"
		gen pct_hhh_midsch = inlist(hhh_edu_level,"MI","SE","HS","GR")
		gen pct_hhh_occ_agr = hhh_occupation=="AGR"
		gen pct_hhh_occ_wrk = hhh_occupation=="WRK"
		gen pct_hhh_occ_dom = hhh_occupation=="DOM"
		gen pct_hhh_occ_oth = hhh_occupation=="OTH"
				
			// create household-level indicator varialbes, for averaging
		gen pct_hh_scst = inlist(caste_group,"SC","ST")
		gen pct_hh_owned = inlist(hh_own,"O")
		gen pct_hh_mnth_inc_0_5 = inlist(highest_mnth_inc,"<5")
		gen pct_hh_mnth_inc_5_10 = inlist(highest_mnth_inc,"5-10")
		gen pct_hh_mnth_inc_10 = regexm(highest_mnth_inc,"10")==1 & regexm(highest_mnth_inc,"5")==0
		levelsof main_src_of_hh_inc, local(hhinc)
		foreach v in `hhinc' {
			local v2 = lower("`v'")
			gen pct_hh_mnth_inc_`v2' = main_src_of_hh_inc=="`v'"
		}
		gen pct_hh_mobile_phone = inlist(own_phone,"M","B")
		
			// rename numeric variables for collapsing
		cap rename hhh_age avg_hhh_age
		cap rename hh_rooms avg_hh_rooms
		foreach v in bonded_labor manual_scavenger salaried_job irr_equip ///
					       own_oprt_ent_reg_wt_gvt own_frig own_motor_veh  ///
								 mech_3_4_wheeler_agr_eqp  credit_card own_any_land {
			cap rename `v' pct_hh_`v'
		}
		cap rename *_wheeler_* *_wh_*
		
			// take unweighted average, label, save village-level dataset
		preserve

		collapse (mean) pct_* avg_* (count) secc_n_hh=hh_id, by(pca01_id unmatched_id) fast
		la var unmatched_id "ID for villages not matched to PCA"
		cap la var pct_hh_bonded_labor "Pct HH with member in bonded labor (unweighted)"
		cap la var pct_hh_manual_scavenger "Pct HH with member who is scavenger (unweighted)"
		cap la var pct_hh_salaried_job "Pct HH with member in salaried job (unweighted)"
		cap la var pct_hh_own_oprt_ent "Pct HH own/operated registered enterprise (unweighted)"
		cap la var pct_hh_own_frig "Pct HH that own frig (unweighted)"
		cap la var pct_hh_own_motor_veh "Pct HH own motor vehicle (unweighted)"
		cap la var pct_hh_mech_3_4 "Pct HH own ag mechanized wheeler (unweighted)"
		cap la var pct_hh_irr_equip "Pct HH own irrigation equipment (unweighted)"
		cap la var pct_hh_credit_card "Pct HH with credit card (unweighted)"
		cap la var pct_hh_own_any_land "Pct HH own any land (unweighted)"
		cap la var pct_hhh_female "Pct HH heads that are female (unweighted)"
		cap la var pct_hhh_illit "Pct HH heads illiterate (unweighted)"
		cap la var pct_hhh_midsch "Pct HH heads with middle school educ (unweighted)"
		cap la var pct_hhh_occ_agr "Pct HH heads, occ: agriculture (unweighted)"
		cap la var pct_hhh_occ_wrk "Pct HH heads, occ: worker (unweighted)"
		cap la var pct_hhh_occ_dom "Pct HH heads, occ: domestic (unweighted)"
		cap la var pct_hhh_occ_oth "Pct HH heads, occ: other (unweighted)"
		cap la var pct_hh_scst "Pct HH SC/ST (unweighted)"
		cap la var pct_hh_mnth_inc_0_5 "Pct HH monthly income <5K Rs (unweighted)"
		cap la var pct_hh_mnth_inc_5_10 "Pct HH monthly income 5-10K Rs (unweighted)"
		cap la var pct_hh_mnth_inc_10 "Pct HH monthly income >10K Rs (unweighted)"
		cap la var pct_hh_mnth_inc_cul "Pct HH main income: cultivation (unweighted)"
		cap la var pct_hh_mnth_inc_dom "Pct HH main income: domestic (unweighted)"
		cap la var pct_hh_mnth_inc_ent "Pct HH main income: enterprise (unweighted)"
		cap la var pct_hh_mnth_inc_for "Pct HH main income: foraging (unweighted)"
		cap la var pct_hh_mnth_inc_lab "Pct HH main income: labor (unweighted)"
		cap la var pct_hh_mnth_inc_oth "Pct HH main income: other (unweighted)"
		cap la var pct_hh_mobile_phone "Pct HH own mobile phone (unweighted)"
		cap la var avg_hhh_age "Avg age of head of HH (unweighted)"
		cap la var avg_hh_rooms "Avg number of rooms per household (unweighted)"
		cap la var secc_n_hh "Number of households included in SECC deprivations subsample"
		duplicates drop
		compress
		cap append using "$secc_v/secc_pca_vill_st`st'_nowt.dta"
		save "$secc_v/secc_pca_vill_st`st'_nowt.dta", replace		
		
		restore
		
			// inflate by number of household members 
		egen denom = sum(n_tot), by(pca01_id unmatched_id)
		foreach v of varlist pct* avg* {
			replace `v' = n_tot*`v'/denom
			rename `v' w`v'
		}
		
		// take weighted sum (for averaging later), label, save
		collapse (sum) wpct_* wavg_* , by(pca01_id unmatched_id) fast
		cap la var unmatched_id "ID for villages not matched to PCA"
		cap la var wpct_hh_bonded_labor "Pct HH with member in bonded labor (pop weighted)"
		cap la var wpct_hh_manual_scavenger "Pct HH with member who is scavenger (pop weighted)"
		cap la var wpct_hh_salaried_job "Pct HH with member in salaried job (pop weighted)"
		cap la var wpct_hh_own_oprt_ent "Pct HH own/operated registered enterprise (pop weighted)"
		cap la var wpct_hh_own_frig "Pct HH that own frig (pop weighted)"
		cap la var wpct_hh_own_motor_veh "Pct HH own motor vehicle (pop weighted)"
		cap la var wpct_hh_mech_3_4 "Pct HH own ag mechanized wheeler (pop weighted)"
		cap la var wpct_hh_irr_equip "Pct HH own irrigation equipment (pop weighted)"
		cap la var wpct_hh_credit_card "Pct HH with credit card (pop weighted)"
		cap la var wpct_hh_own_any_land "Pct HH own any land (pop weighted)"
		cap la var wpct_hhh_female "Pct HH heads that are female (pop weighted)"
		cap la var wpct_hhh_illit "Pct HH heads illiterate (pop weighted)"
		cap la var wpct_hhh_midsch "Pct HH heads with middle school educ (pop weighted)"
		cap la var wpct_hhh_occ_agr "Pct HH heads, occ: agriculture (pop weighted)"
		cap la var wpct_hhh_occ_wrk "Pct HH heads, occ: worker (pop weighted)"
		cap la var wpct_hhh_occ_dom "Pct HH heads, occ: domestic (pop weighted)"
		cap la var wpct_hhh_occ_oth "Pct HH heads, occ: other (pop weighted)"
		cap la var wpct_hh_scst "Pct HH SC/ST (pop weighted)"
		cap la var wpct_hh_mnth_inc_0_5 "Pct HH monthly income <5K Rs (pop weighted)"
		cap la var wpct_hh_mnth_inc_5_10 "Pct HH monthly income 5-10K Rs (pop weighted)"
		cap la var wpct_hh_mnth_inc_10 "Pct HH monthly income >10K Rs (pop weighted)"
		cap la var wpct_hh_mnth_inc_cul "Pct HH main income: cultivation (pop weighted)"
		cap la var wpct_hh_mnth_inc_dom "Pct HH main income: domestic (pop weighted)"
		cap la var wpct_hh_mnth_inc_ent "Pct HH main income: enterprise (pop weighted)"
		cap la var wpct_hh_mnth_inc_for "Pct HH main income: foraging (pop weighted)"
		cap la var wpct_hh_mnth_inc_lab "Pct HH main income: labor (pop weighted)"
		cap la var wpct_hh_mnth_inc_oth "Pct HH main income: other (pop weighted)"
		cap la var wpct_hh_mobile_phone "Pct HH own mobile phone (pop weighted)"
		cap la var wavg_hhh_age "Avg age of head of HH (pop weighted)"
		cap la var wavg_hh_rooms "Avg number of rooms per household (pop weighted)"
		duplicates drop
		compress
		cap append using "$secc_v/secc_pca_vill_st`st'_popwt.dta"
		save "$secc_v/secc_pca_vill_st`st'_popwt.dta", replace		
	
	}
		
}		
}		
		
************************************************************** 
************************************************************** 

** 3. Collapse individual-level datasets to the village (PCA01_id) level
** 		(demographic variables that just need adding up) 

forvalues st = 1/36 {
if inlist(`st',4)==0 {

	cd "$secc_i/st`st'"
	cap erase "$secc_v/secc_pca_vill_st`st'.dta"
	local dtfiles : dir . files "*.dta"
	foreach file of local dtfiles {

		use "`file'", clear
		
		// confirm PCA01_id exists, and if not, create it
		cap count if pca01_id!=.
		cap gen pca01_id = .

		// drop households that should have been auto-excluded (drop specific househoulds)
		{
		cap drop if pca01_id == 91869 & hh_id == "052100119"
		cap drop if pca01_id == 94926 & hh_id == "030300106"
		cap drop if pca01_id == 94926 & hh_id == "030300066"
		cap drop if pca01_id == 587589 & hh_id == "026800116"
		cap drop if pca01_id == 587589 & hh_id == "026800058"
		cap drop if pca01_id == 524394 & hh_id == "005700003" // from here on, all from state 36 district 3
		cap drop if pca01_id == 524390 & hh_id == "003800003" // (all failed on pucca)
		cap drop if pca01_id == 524390 & hh_id == "003800004" // (all have burnt brick walls)
		cap drop if pca01_id == 524390 & hh_id == "003800005"
		cap drop if pca01_id == 524390 & hh_id == "003800006"
		cap drop if pca01_id == 524390 & hh_id == "003800012"
		cap drop if pca01_id == 524390 & hh_id == "003800016"
		cap drop if pca01_id == 524390 & hh_id == "003800018"
		cap drop if pca01_id == 524390 & hh_id == "003800019"
		cap drop if pca01_id == 524390 & hh_id == "003800020"
		cap drop if pca01_id == 524390 & hh_id == "003800021"
		cap drop if pca01_id == 524390 & hh_id == "003800023"
		cap drop if pca01_id == 524390 & hh_id == "003800024"
		cap drop if pca01_id == 524390 & hh_id == "003800025"
		cap drop if pca01_id == 524390 & hh_id == "003800026"
		cap drop if pca01_id == 524390 & hh_id == "003800028"
		cap drop if pca01_id == 524390 & hh_id == "003800032"
		cap drop if pca01_id == 524390 & hh_id == "003800064"
		cap drop if pca01_id == 524390 & hh_id == "003800074"
		cap drop if pca01_id == 524390 & hh_id == "003800075"
		cap drop if pca01_id == 524390 & hh_id == "003800076"
		cap drop if pca01_id == 524390 & hh_id == "003800077"
		cap drop if pca01_id == 524390 & hh_id == "003800078"
		cap drop if pca01_id == 524390 & hh_id == "003800080"
		cap drop if pca01_id == 524390 & hh_id == "003800081"
		cap drop if pca01_id == 524390 & hh_id == "003800083"
		cap drop if pca01_id == 524390 & hh_id == "003800101"
		cap drop if pca01_id == 524390 & hh_id == "003800106"
		cap drop if pca01_id == 524390 & hh_id == "003800109"
		cap drop if pca01_id == 524390 & hh_id == "003800110"
		cap drop if pca01_id == 524390 & hh_id == "003800112"
		cap drop if pca01_id == 524390 & hh_id == "003800114"
		cap drop if pca01_id == 524390 & hh_id == "003800117"
		cap drop if pca01_id == 524390 & hh_id == "003800118"
		cap drop if pca01_id == 524390 & hh_id == "003800119"
		cap drop if pca01_id == 524389 & hh_id == "003400060"
		}

			// correct auto-exclusion criteria that were miscoded at the individual level, but not for the whole household
			// (we're treating the (very few) errors as coding errors, NOT erroneous inclusions in the SECC deprivations dataset)
		** i. Motorized 2/3/4 wheeler/fishing boat (2 --> auto-exclusion)
		cap gen own_motor_veh = 0
		qui replace own_motor_veh = 0 	// this is consistently miscoded as Y=2, N=1 instead of Y=1, N=0

		** ii. Mechanized 3-4 wheeler agricultural equipment (1 --> auto-exclusion) 
		cap gen mech_3_4_wheeler_agr_eqp = 0
		qui replace mech_3_4_wheeler_agr_eqp = 0 

		** iii. Kisan credit card with credit limit of over Rs. 50,000/- (1 --> auto-exclusion)
		cap gen credit_card = 0
		qui replace credit_card = 0 if credit_card!=.

		** iv. Household member goverment employee (YES --> auto-exclusion)
		*  (category was dropped in original data build because we felt it was not relevant to our analysis)

		** v. Households with non-agricultural enterprises registered with government (1 --> auto-exclusion)
		cap gen own_oprt_ent_reg_wt_gvt = 0
		qui replace own_oprt_ent_reg_wt_gvt = 0 if own_oprt_ent_reg_wt_gvt!=.

		** vi. Any member of household earning more than Rs. 10,000 per month (>10k --> auto-exclusion) 
		assert highest_mnth_inc=="5-10" | highest_mnth_inc=="<5" if highest_mnth_inc!=""

		** vii. Paying income tax (YES --> auto-exclusion)
		*  (category was dropped in original data build because we felt it was not relevant to our analysis)

		** viii. Paying professional tax (YES --> auto-exclusion)
		*  (category was dropped in original data build because we felt it was not relevant to our analysis)
		 
		** ix. 3 or more rooms with pucca walls and roof (3+pucca --> auto-exclusion)
		qui gen pucca_walls = inlist(wall_mat,"BBK","CON","STM") 
		qui gen pucca_roof = inlist(roof_mat,"BBK","CON","GMA","MMT","SLA","STO")
		cap gen hh_rooms = 1
		cap replace hh_rooms = "1" if inlist(hh_rooms,"AV","KUR")
		cap destring hh_rooms, replace
		qui gen pucca_3room = hh_rooms>=3 & pucca_walls==1 & pucca_roof==1
		assert pucca_3room == 0
		cap drop pucca_walls pucca_roof pucca_3room

		** x. Owns a refrigerator (1 --> auto-exclusion)
		cap gen own_frig = 0
		qui replace own_frig = 0 if own_frig!=.

		** xi. Owns landline phone ("L" --> auto-exclusion)
		cap gen own_phone = "N"
		qui egen temp_mode = mode(own_phone) if own_phone!="L", by(st_code dt_code bk_code gr_code tn_code hh_id pca01_id) minmode
		qui egen temp_mode2 = mode(temp_mode), by(st_code dt_code bk_code gr_code tn_code hh_id pca01_id) minmode
		qui replace own_phone = temp_mode2 if own_phone=="L" & inlist(temp_mode2,"N","M")
		assert inlist(own_phone,"N","M","")
		cap drop temp_mode*
		
		** xii. Owns more than 2.5 acres of irrigated land with 1 irrigation equipment (>2.5 & 1 --> auto-exclusion)
		cap gen totalirr = 0
		cap gen irr_equip = 0
		cap replace totalirr = "" if inlist(totalirr,"YES", "NO")
		cap destring totalirr, replace
		qui gen crit12 = totalirr>2.5 & irr_equip==1
		assert crit12==0 if totalirr!=.
		cap drop crit12
		
		** xiii. 5 acres or more of irrigated land for two or more crop seasons (>5 --> auto-exclusion)
		assert totalirr<5 if totalirr!=.

		** xiv. Owning at least 7.5 acres of land or more with at least one irrigation equipment.
		cap gen totalunirr = 0
		cap gen otherirr = 0
		cap replace totalunirr = "" if inlist(totalunirr,"YES","NO")
		cap replace otherirr = "" if inlist(otherirr,"YES","NO")
		cap destring totalunirr otherirr, replace
		qui gen all_land = totalirr + totalunirr + otherirr
		qui gen crit14 = all_land>=7.5 & irr_equip==1
		assert crit14==0 if all_land!=.
		cap drop all_land crit14

		// create village ids for villages unmatched to the PCA
		sort pca01_id st_code dt_code bk_code gr_code tn_code
		gen temp_unmatched_id = 1 if pca01_id==.
		egen unmatched_id = group(temp_unmatched_id st_code dt_code bk_code gr_code tn_code)

		// people in village by gender
		assert gender=="M" | gender=="F" | gender==""
		egen n_tot = count(person_id), by(pca01_id unmatched_id)
		egen n_male = count(person_id) if gender=="M", by(pca01_id unmatched_id)
		egen n_female = count(person_id) if gender=="F", by(pca01_id unmatched_id)

			// people in village by age
		egen n_age_0_6 = count(person_id) if inrange(age,0,6), by(pca01_id unmatched_id)
		egen n_age_7_15 = count(person_id) if inrange(age,7,15), by(pca01_id unmatched_id)
		egen n_age_16_25 = count(person_id) if inrange(age,16,25), by(pca01_id unmatched_id)
		egen n_age_26_40 = count(person_id) if inrange(age,26,40), by(pca01_id unmatched_id)
		egen n_age_41_60 = count(person_id) if inrange(age,41,60), by(pca01_id unmatched_id)
		egen n_age_61_plus = count(person_id) if age>60, by(pca01_id unmatched_id)

			// people in village by gender and age
		egen n_m_age_0_6 = count(person_id) if inrange(age,0,6) & gender=="M", by(pca01_id unmatched_id)
		egen n_m_age_7_15 = count(person_id) if inrange(age,7,15) & gender=="M", by(pca01_id unmatched_id)
		egen n_m_age_16_25 = count(person_id) if inrange(age,16,25) & gender=="M", by(pca01_id unmatched_id)
		egen n_m_age_26_40 = count(person_id) if inrange(age,26,40) & gender=="M", by(pca01_id unmatched_id)
		egen n_m_age_41_60 = count(person_id) if inrange(age,41,60) & gender=="M", by(pca01_id unmatched_id)
		egen n_m_age_61_plus = count(person_id) if age>60 & gender=="M", by(pca01_id unmatched_id)
		egen n_f_age_0_6 = count(person_id) if inrange(age,0,6) & gender=="F", by(pca01_id unmatched_id)
		egen n_f_age_7_15 = count(person_id) if inrange(age,7,15) & gender=="F", by(pca01_id unmatched_id)
		egen n_f_age_16_25 = count(person_id) if inrange(age,16,25) & gender=="F", by(pca01_id unmatched_id)
		egen n_f_age_26_40 = count(person_id) if inrange(age,26,40) & gender=="F", by(pca01_id unmatched_id)
		egen n_f_age_41_60 = count(person_id) if inrange(age,41,60) & gender=="F", by(pca01_id unmatched_id)
		egen n_f_age_61_plus = count(person_id) if age>60 & gender=="F", by(pca01_id unmatched_id)

			// other within-village demographic counts, by age, for adults
		egen n_m_16_single = count(person_id) if age>=16 & gender=="M" & marital=="N", by(pca01_id unmatched_id) // M adults that are single and never married
		egen n_f_16_single = count(person_id) if age>=16 & gender=="F" & marital=="N", by(pca01_id unmatched_id) // F adults that are single and never married
		egen n_m_16_illit = count(person_id) if age>=16 & gender=="M" & edu_level=="IL", by(pca01_id unmatched_id) // M adults that are illiterate
		egen n_f_16_illit = count(person_id) if age>=16 & gender=="F" & edu_level=="IL", by(pca01_id unmatched_id) // F adults that are illiterate
		egen n_m_16_midsch = count(person_id) if age>=16 & gender=="M" & inlist(edu_level,"MI","SE","HS","GR"), by(pca01_id unmatched_id) // M adults with at least middle school education
		egen n_f_16_midsch = count(person_id) if age>=16 & gender=="F" & inlist(edu_level,"MI","SE","HS","GR"), by(pca01_id unmatched_id) // F adults with at least middle school education

			// within-village job counts, by age, for adults
		levelsof occupation, local(jobs)
		foreach j in `jobs' {
			local j2 = lower("`j'")
			egen n_m_occ_`j2' = count(person_id) if gender=="M" & occupation=="`j'", by(pca01_id unmatched_id) // M
			egen n_f_occ_`j2' = count(person_id) if gender=="F" & occupation=="`j'", by(pca01_id unmatched_id) // F
			egen n_m_16_occ_`j2' = count(person_id) if gender=="M" & occupation=="`j'" & age>=16, by(pca01_id unmatched_id) // M adults
			egen n_f_16_occ_`j2' = count(person_id) if gender=="F" & occupation=="`j'" & age>=16, by(pca01_id unmatched_id) // F adults
		}

		// replace missings with zeros for all within-village person counts 
		foreach v of varlist n_* {
			egen temp = mean(`v'), by(pca01_id unmatched_id)
			replace `v' = temp if `v' == .
			drop temp
			replace `v' = 0 if `v'==.
		}	

		// collapse to village level
		cap destring totalirr totalunirr, force replace
		collapse (mean) n_* (sum) totalirr totalunirr, by(pca01_id unmatched_id) fast
		gen pct_land_irr = totalirr/(totalirr+totalunirr)
		
		// new variable labels, and updating existing labels
		la var n_tot "Total people in village (surveyed by SECC)"
		la var n_male "Total males in village (surveyed by SECC)"
		la var n_female "Total females in village (surveyed by SECC)"
		la var n_age_0_6 "pop, ages 0-6"
		la var n_age_7_15 "pop, ages 7-15"
		la var n_age_16_25 "pop, ages 16-25"
		la var n_age_26_40 "pop, ages 26-40"
		la var n_age_41_60 "pop, ages 41-60"
		la var n_age_61_plus "pop, ages 61 and up"
		la var n_m_age_0_6 "M pop, ages 0-6"
		la var n_m_age_7_15 "M pop, ages 7-15"
		la var n_m_age_16_25 "M pop, ages 16-25"
		la var n_m_age_26_40 "M pop, ages 26-40"
		la var n_m_age_41_60 "M pop, ages 41-60"
		la var n_m_age_61_plus "M pop, ages 61 and up"
		la var n_f_age_0_6 "F pop, ages 0-6"
		la var n_f_age_7_15 "F pop, ages 7-15"
		la var n_f_age_16_25 "F pop, ages 16-25"
		la var n_f_age_26_40 "F pop, ages 26-40"
		la var n_f_age_41_60 "F pop, ages 41-60"
		la var n_f_age_61_plus "F pop, ages 61 and up"
		la var n_m_16_single "M pop, 16+ and never married"
		la var n_f_16_single "F pop, 16+ and never married"
		la var n_m_16_illit "M pop, 16+ and illiterate"
		la var n_f_16_illit "F pop, 16+ and illiterate"
		la var n_m_16_midsch "M pop, 16+ and at least middle school education"
		la var n_f_16_midsch "F pop, 16+ and at least middle school education"
		cap la var n_m_occ_agr "M pop, occup: agriculture"
		cap la var n_m_occ_dep "M pop, occup: dependent"
		cap la var n_m_occ_dom "M pop, occup: domestic"
		cap la var n_m_occ_nil "M pop, occup: none"
		cap la var n_m_occ_oth "M pop, occup: other"
		cap la var n_m_occ_stu "M pop, occup: student"
		cap la var n_m_occ_wrk "M pop, occup: worker"
		cap la var n_f_occ_agr "F pop, occup: agriculture"
		cap la var n_f_occ_dep "F pop, occup: dependent"
		cap la var n_f_occ_dom "F pop, occup: domestic"
		cap la var n_f_occ_nil "F pop, occup: none"
		cap la var n_f_occ_oth "F pop, occup: other"
		cap la var n_f_occ_stu "F pop, occup: student"
		cap la var n_f_occ_wrk "F pop, occup: worker"
		cap la var n_m_16_occ_agr "M pop, 16+, occup: agriculture"
		cap la var n_m_16_occ_dep "M pop, 16+, occup: dependent"
		cap la var n_m_16_occ_dom "M pop, 16+, occup: domestic"
		cap la var n_m_16_occ_nil "M pop, 16+, occup: none"
		cap la var n_m_16_occ_oth "M pop, 16+, occup: other"
		cap la var n_m_16_occ_stu "M pop, 16+, occup: student"
		cap la var n_m_16_occ_wrk "M pop, 16+, occup: worker"
		cap la var n_f_16_occ_agr "F pop, 16+, occup: agriculture"
		cap la var n_f_16_occ_dep "F pop, 16+, occup: dependent"
		cap la var n_f_16_occ_dom "F pop, 16+, occup: domestic"
		cap la var n_f_16_occ_nil "F pop, 16+, occup: none"
		cap la var n_f_16_occ_oth "F pop, 16+, occup: other"
		cap la var n_f_16_occ_stu "F pop, 16+, occup: student"
		cap la var n_f_16_occ_wrk "F pop, 16+, occup: worker"
		cap la var pct_land_irr "Pct of owned land irrigated"

		duplicates drop
		compress
		cap append using "$secc_v/secc_pca_vill_st`st'.dta"
		save "$secc_v/secc_pca_vill_st`st'.dta", replace		

	}
}
}

************************************************************** 
************************************************************** 

** 4. Merge/append village-level datasets into a single panel
** 		(demographic variables that just need adding up) 

{

	// create row_id for merging (since I messed up the unmatched identifiers)
forvalues st = 1/36 {
if inlist(`st',4)==0 {

	use "$secc_v/secc_pca_vill_st`st'.dta", clear
	cap gen row_id = _n
	cap gen st_code = `st'
	save "$secc_v/secc_pca_vill_st`st'.dta", replace
	
	use "$secc_v/secc_pca_vill_st`st'_nowt.dta", clear
	cap gen row_id = _n
	cap gen st_code = `st'
	save "$secc_v/secc_pca_vill_st`st'_nowt.dta", replace

	use "$secc_v/secc_pca_vill_st`st'_popwt.dta", clear
	cap gen row_id = _n
	cap gen st_code = `st'
	save "$secc_v/secc_pca_vill_st`st'_popwt.dta", replace

}
}

	// merge on row_id, confirming that pca01_id and unmatched_id are the same
cap erase "$secc/secc_pca_vill_all.dta"	
forvalues st = 1/36 {
if inlist(`st',4)==0 {

	use "$secc_v/secc_pca_vill_st`st'.dta", replace
	rename pca01_id pca01_idM
	rename unmatched_id unmatched_idM
	merge 1:1 row_id using  "$secc_v/secc_pca_vill_st`st'_nowt.dta"
	assert _merge==3
	assert pca01_id==pca01_idM
	assert unmatched_id==unmatched_idM
	drop pca01_id unmatched_id _merge
	merge 1:1 row_id using  "$secc_v/secc_pca_vill_st`st'_popwt.dta"
	assert _merge==3
	assert pca01_id==pca01_idM
	assert unmatched_id==unmatched_idM
	
	drop pca01_idM unmatched_idM _merge
	order row_id st_code pca01_id unmatched_id 
	rename row_id secc_v_id
	la var secc_v_id
	
	compress
	duplicates drop
	cap append using "$secc/secc_pca_vill_all.dta"	
	save "$secc/secc_pca_vill_all.dta", replace
}
}


use  "$secc/secc_pca_vill_all.dta", clear
save "$panel/secc_pca_vill_all.dta", replace

}

************************************************************** 
************************************************************** 
